#All libraries
library(ggplot2)
library(wordcloud)
library(circlize)
getwd()
## [1] "/Users/Kaul10/Desktop"
setwd("/Users/Kaul10/Desktop")
NYC <- read.csv("NYCLCD.csv")
#str(NYC)
nyc_2007_m <- subset(NYC, Year == 2007 & Sex == "MALE")
nyc_2007_f <- subset(NYC, Year == 2007 & Sex == "FEMALE")
m_2007 <- aggregate(nyc_2007_m$Count, by = list(nyc_2007_m$Cause.of.Death), FUN = sum)
gm_2007 <- ggplot(m_2007, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkblue")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Males 2007")+
ylab("Number of Deaths")+
xlab("")
ylim(0,12000)
## continuous_scale(aesthetics = c("y", "ymin", "ymax", "yend",
## "yintercept", "ymin_final", "ymax_final"), scale_name = "position_c",
## palette = identity, limits = ..1, expand = expand, trans = ..2,
## guide = "none")
f_2007 <- aggregate(nyc_2007_f$Count, by = list(nyc_2007_f$Cause.of.Death), FUN = sum)
gf_2007 <- ggplot(f_2007, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkred")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Females 2007")+
ylab("Number of Deaths")+
xlab("")+
ylim(0,12000)
#2008
nyc_2008_m <- subset(NYC, Year == 2008 & Sex == "MALE")
nyc_2008_f <- subset(NYC, Year == 2008 & Sex == "FEMALE")
m_2008 <- aggregate(nyc_2008_m$Count, by = list(nyc_2008_m$Cause.of.Death), FUN = sum)
gm_2008 <- ggplot(m_2008, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkblue")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Males 2008")+
xlab("")+
ylab("")+
ylim(0,12000)
f_2008 <- aggregate(nyc_2008_f$Count, by = list(nyc_2008_f$Cause.of.Death), FUN = sum)
gf_2008 <- ggplot(f_2008, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkred")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Females 2008")+
xlab("")+
ylab("")+
ylim(0,12000)
#2009
nyc_2009_m <- subset(NYC, Year == 2009 & Sex == "MALE")
nyc_2009_f <- subset(NYC, Year == 2009 & Sex == "FEMALE")
m_2009 <- aggregate(nyc_2009_m$Count, by = list(nyc_2009_m$Cause.of.Death), FUN = sum)
gm_2009 <- ggplot(m_2009, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkblue")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths Males 2009")+
xlab("")+
ylab("")+
ylim(0,12000)
f_2009 <- aggregate(nyc_2009_f$Count, by = list(nyc_2009_f$Cause.of.Death), FUN = sum)
gf_2009 <- ggplot(f_2009, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkred")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Females 2009")+
xlab("")+
ylab("")+
ylim(0,12000)
#2010
nyc_2010_m <- subset(NYC, Year == 2010 & Sex == "MALE")
nyc_2010_f <- subset(NYC, Year == 2010 & Sex == "FEMALE")
m_2010 <- aggregate(nyc_2010_m$Count, by = list(nyc_2010_m$Cause.of.Death), FUN = sum)
gm_2010 <- ggplot(m_2010, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkblue")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Males 2010")+
xlab("")+
ylab("")+
ylim(0,12000)
f_2010 <- aggregate(nyc_2010_f$Count, by = list(nyc_2010_f$Cause.of.Death), FUN = sum)
gf_2010 <- ggplot(f_2010, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkred")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Females 2010")+
xlab("")+
ylab("")+
ylim(0,12000)
#2011
nyc_2011_m <- subset(NYC, Year == 2011 & Sex == "MALE")
nyc_2011_f <- subset(NYC, Year == 2011 & Sex == "FEMALE")
m_2011 <- aggregate(nyc_2011_m$Count, by = list(nyc_2011_m$Cause.of.Death), FUN = sum)
gm_2011 <- ggplot(m_2011, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkblue")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Males 2011")+
xlab("")+
ylab("")+
ylim(0,12000)
f_2011 <- aggregate(nyc_2011_f$Count, by = list(nyc_2011_f$Cause.of.Death), FUN = sum)
gf_2011 <- ggplot(f_2011, aes(x = Group.1, y = x)) + geom_bar(stat = "identity", fill = "darkred")+
theme(axis.text.x = element_text(face="bold", color="black", size=6, angle=90, hjust = 1),
axis.text.y = element_text(face="bold", color="black", size=6))+
ggtitle("Number of Deaths - Females 2011")+
xlab("")+
ylab("")+
ylim(0,12000)
#Using the multiple plot function from cookbook-r.com
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
#Create all the ggplots on a single page
multiplot(gm_2007,gf_2007,gm_2008,gf_2008,gm_2009,gf_2009,gm_2010,gf_2010,gm_2011,gf_2011,cols=5)

unique(NYC$Ethnicity)
## [1] ASIAN & PACIFIC ISLANDER HISPANIC
## [3] NON-HISPANIC BLACK NON-HISPANIC WHITE
## 4 Levels: ASIAN & PACIFIC ISLANDER HISPANIC ... NON-HISPANIC WHITE
unique(NYC$Cause.of.Death)
## [1] ACCIDENTS EXCEPT DRUG POISONING
## [2] ALZHEIMERS DISEASE
## [3] AORTIC ANEURYSM AND DISSECTION
## [4] ASSAULT (HOMICIDE)
## [5] ATHEROSCLEROSIS
## [6] BENIGN AND UNCERTAIN NEOPLASMS
## [7] CEREBROVASCULAR DISEASE
## [8] CHRONIC LIVER DISEASE AND CIRRHOSIS
## [9] CHRONIC LOWER RESPIRATORY DISEASES
## [10] DIABETES MELLITUS
## [11] DISEASES OF HEART
## [12] ESSENTIAL HYPERTENSION AND RENAL DISEASES
## [13] INFLUENZA AND PNEUMONIA
## [14] INTENTIONAL SELF-HARM (SUICIDE)
## [15] MALIGNANT NEOPLASMS
## [16] NEPHRITIS, NEPHROTIC SYNDROME AND NEPHROSIS
## [17] PARKINSONS DISEASE
## [18] PEPTIC ULCER
## [19] VIRAL HEPATITIS
## [20] CARDIOVASCULAR DISORDERS IN PERINATAL PERIOD
## [21] CONGENITAL MALFORMATIONS,DEFORMATIONS
## [22] MENTAL DISORDERS DUE TO USE OF ALCOHOL
## [23] PSYCH. SUBSTANCE USE & ACCIDENTAL DRUG POISONING
## [24] SEPTICEMIA
## [25] CHOLELITHIASIS AND DISORDERS OF GALLBLADDER
## [26] HUMAN IMMUNODEFICIENCY VIRUS DISEASE
## [27] PREGNANCY, CHILDBIRTH AND THE PUERPERIUM
## [28] SHORT GESTATION/LBW
## [29] RESPIRATORY DISTRESS OF NEWBORN
## [30] ANEMIAS
## [31] PNEUMONITIS DUE TO SOLIDS AND LIQUIDS
## [32] TUBERCULOSIS
## 32 Levels: ACCIDENTS EXCEPT DRUG POISONING ALZHEIMERS DISEASE ... VIRAL HEPATITIS
cod_1 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "MALE" & Ethnicity == "NON-HISPANIC BLACK")
cod_2 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC BLACK")
g1 <- ggplot(cod_1, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g2 <- ggplot(cod_2, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_3 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "MALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
cod_4 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "FEMALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
g3 <- ggplot(cod_3, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g4 <- ggplot(cod_4, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_5 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "MALE" & Ethnicity == "NON-HISPANIC WHITE")
cod_6 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC WHITE")
g5 <- ggplot(cod_5, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g6 <- ggplot(cod_6, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
multiplot(g1,g3,g5,g2,g4,g6, cols = 3)

cod_1 <- subset(NYC, Cause.of.Death == "DISEASES OF HEART" & Sex == "MALE" & Ethnicity == "NON-HISPANIC BLACK")
cod_2 <- subset(NYC, Cause.of.Death == "DISEASES OF HEART" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC BLACK")
g1 <- ggplot(cod_1, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g2 <- ggplot(cod_2, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_3 <- subset(NYC, Cause.of.Death == "DISEASES OF HEART" & Sex == "MALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
cod_4 <- subset(NYC, Cause.of.Death == "DISEASES OF HEART" & Sex == "FEMALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
g3 <- ggplot(cod_3, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g4 <- ggplot(cod_4, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_5 <- subset(NYC, Cause.of.Death == "DISEASES OF HEART" & Sex == "MALE" & Ethnicity == "NON-HISPANIC WHITE")
cod_6 <- subset(NYC, Cause.of.Death == "DISEASES OF HEART" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC WHITE")
g5 <- ggplot(cod_5, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g6 <- ggplot(cod_6, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
multiplot(g1,g3,g5,g2,g4,g6, cols = 3)

cod_1 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "MALE" & Ethnicity == "NON-HISPANIC BLACK")
cod_2 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC BLACK")
g1 <- ggplot(cod_1, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g2 <- ggplot(cod_2, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_3 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "MALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
cod_4 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "FEMALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
g3 <- ggplot(cod_3, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g4 <- ggplot(cod_4, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_5 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "MALE" & Ethnicity == "NON-HISPANIC WHITE")
cod_6 <- subset(NYC, Cause.of.Death == "CHRONIC LOWER RESPIRATORY DISEASES" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC WHITE")
g5 <- ggplot(cod_5, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g6 <- ggplot(cod_6, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
multiplot(g1,g3,g5,g2,g4,g6, cols = 3)

cod_1 <- subset(NYC, Cause.of.Death == "DIABETES MELLITUS" & Sex == "MALE" & Ethnicity == "NON-HISPANIC BLACK")
cod_2 <- subset(NYC, Cause.of.Death == "DIABETES MELLITUS" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC BLACK")
g1 <- ggplot(cod_1, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g2 <- ggplot(cod_2, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_3 <- subset(NYC, Cause.of.Death == "DIABETES MELLITUS" & Sex == "MALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
cod_4 <- subset(NYC, Cause.of.Death == "DIABETES MELLITUS" & Sex == "FEMALE" & Ethnicity == "ASIAN & PACIFIC ISLANDER")
g3 <- ggplot(cod_3, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g4 <- ggplot(cod_4, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
cod_5 <- subset(NYC, Cause.of.Death == "DIABETES MELLITUS" & Sex == "MALE" & Ethnicity == "NON-HISPANIC WHITE")
cod_6 <- subset(NYC, Cause.of.Death == "DIABETES MELLITUS" & Sex == "FEMALE" & Ethnicity == "NON-HISPANIC WHITE")
g5 <- ggplot(cod_5, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
g6 <- ggplot(cod_6, aes(x = Year, y = Count))+geom_point(col=rgb(0,200,0,75,maxColorValue=255),pch=19,cex=10)+theme_bw()
multiplot(g1,g3,g5,g2,g4,g6, cols = 3)

df <- data.frame(table(NYC$Cause.of.Death))
par(bg="white")
#wordcloud(df$Var1 , df$Freq,scale=c(2,.2), col=terrain.colors(length(df$Var1) , alpha=0.9) , rot.per=0.9 )
wordcloud(df$Var1 , df$Freq,scale=c(2,.2), colors=brewer.pal(8, "Dark2") , rot.per=0.35 )
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD

=======

>>>>>>> master
=======

>>>>>>> master
=======

>>>>>>> master
new_df <- as.data.frame(NYC$Cause.of.Death)
new_df$Count <- NYC$Count
colnames(new_df) <- c("COD","Count")
new <- aggregate(new_df$Count, by = list(new_df$COD), FUN = sum)
small <- new[1:11,]
par(mfrow = c(1,2))
par(bg="white")
chordDiagram(new[1:16,], transparency = 0.5, annotationTrack = "grid", preAllocateTracks = 1)
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), ylim[1] + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
circos.axis(h = "top", labels.cex = 0.5, major.tick.percentage = 0.2, sector.index = sector.name, track.index = 2)
}, bg.border = NA)
chordDiagram(new[17:32,], transparency = 0.5, annotationTrack = "grid", preAllocateTracks = 1)
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), ylim[1] + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
circos.axis(h = "top", labels.cex = 0.5, major.tick.percentage = 0.2, sector.index = sector.name, track.index = 2)
}, bg.border = NA)
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD

=======

>>>>>>> master
=======
